data
# Load the reference
reference <- LoadReference(path = "human_bonemarrow/")
# Load the query object for mapping
query <- LoadFileInput(path = "../04_data_objects/SeuratObject_27subjects_ncounts_nfeatures_mito_3MAD_PCA_clustering.RDS")
# Preprocess with SCTransform
query <- SCTransform(
object = query,
assay = "RNA",
new.assay.name = "refAssay",
residual.features = rownames(x = reference$map),
reference.SCT.model = reference$map[["refAssay"]]@SCTModel.list$refmodel,
method = 'glmGamPoi',
ncells = 2000,
n_genes = 2000,
do.correct.umi = FALSE,
do.scale = FALSE,
do.center = TRUE
)
##
|
| | 0%
|
|====== | 8%
|
|============ | 17%
|
|================== | 25%
|
|======================= | 33%
|
|============================= | 42%
|
|=================================== | 50%
|
|========================================= | 58%
|
|=============================================== | 67%
|
|==================================================== | 75%
|
|========================================================== | 83%
|
|================================================================ | 92%
|
|======================================================================| 100%
# Find anchors between query and reference
anchors <- FindTransferAnchors(
reference = reference$map,
query = query,
k.filter = NA,
reference.neighbors = "refdr.annoy.neighbors",
reference.assay = "refAssay",
query.assay = "refAssay",
reference.reduction = "refDR",
normalization.method = "SCT",
features = intersect(rownames(x = reference$map), VariableFeatures(object = query)),
dims = 1:50,
n.trees = 20,
mapping.score.k = 100
)
# Transfer cell type labels and impute protein expression
refdata <- lapply(X = c("celltype.l1", "celltype.l2"), function(x) {
reference$map[[x, drop = TRUE]]
})
names(x = refdata) <- c("celltype.l1", "celltype.l2")
if (FALSE) {
refdata[["impADT"]] <- GetAssayData(
object = reference$map[['ADT']],
slot = 'data')}
#refdata[["impADT"]] <- GetAssayData(object = reference$map[['ADT']],
# slot = 'data')
query <- TransferData(
reference = reference$map,
query = query,
dims = 1:50,
anchorset = anchors,
refdata = refdata,
n.trees = 20,
store.weights = TRUE
)
## Warning: Keys should be one or more alphanumeric characters followed
## by an underscore, setting key from predictionscorecelltype.l1_ to
## predictionscorecelltypel1_
## Warning: Feature names cannot have underscores ('_'), replacing with dashes
## ('-')
## Warning: Keys should be one or more alphanumeric characters followed
## by an underscore, setting key from predictionscorecelltype.l2_ to
## predictionscorecelltypel2_
# Calculate the embeddings of the query data on the reference SPCA
query <- IntegrateEmbeddings(
anchorset = anchors,
reference = reference$map,
query = query,
reductions = "pcaproject",
reuse.weights.matrix = TRUE
)
## Warning: Keys should be one or more alphanumeric characters followed by an
## underscore, setting key from integrated_dr_ to integrateddr_
## Warning: Keys should be one or more alphanumeric characters followed by an
## underscore, setting key from integrated_dr_ to integrateddr_
## Warning: All keys should be one or more alphanumeric characters followed by an
## underscore '_', setting key to integrateddr_
# Calculate the query neighbors in the reference
# with respect to the integrated embeddings
query[["query_ref.nn"]] <- FindNeighbors(
object = Embeddings(reference$map[["refDR"]]),
query = Embeddings(query[["integrated_dr"]]),
return.neighbor = TRUE,
l2.norm = TRUE
)
# The reference used in the app is downsampled compared to the reference on which
# the UMAP model was computed. This step, using the helper function NNTransform,
# corrects the Neighbors to account for the downsampling.
query <- Azimuth:::NNTransform(
object = query,
meta.data = reference$map[[]]
)
#saveRDS(reference, file = "Azimuth_reference_seurat_object.RDS")
# Project the query to the reference UMAP.
query[["proj.umap"]] <- RunUMAP(
object = query[["query_ref.nn"]],
reduction.model = reference$map[["refUMAP"]],
reduction.key = 'UMAP_'
)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## Warning in RunUMAP.default(object = neighborlist, reduction.model =
## reduction.model, : Number of neighbors between query and reference is not equal
## to the number of neighbros within reference
## Warning: No assay specified, setting assay as RNA by default.
# Calculate mapping score and add to metadata
query <- AddMetaData(
object = query,
metadata = MappingScore(anchors = anchors),
col.name = "mapping.score"
)
visualization using ref assay
# First predicted metadata field, change to visualize other predicted metadata
id <- c("celltype.l1", "celltype.l2")
predicted.id <- paste0("predicted.", id)
# DimPlot of the reference
p1 <- DimPlot(object = reference$plot, reduction = "refUMAP", group.by = id[1], label = TRUE, repel = T) + NoLegend()
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
p2 <- DimPlot(object = reference$plot, reduction = "refUMAP", group.by = id[2], label = TRUE, repel = T) + NoLegend()
## Rasterizing points since number of points exceeds 100,000.
## To disable this behavior set `raster=FALSE`
#p3 <- DimPlot(object = reference$plot, reduction = "refUMAP", group.by = id[3], label = TRUE, repel = T) + NoLegend()
p1 + p2

ggsave("Azimuth_dimplot_reference.pdf", width =16, height = 5)
# DimPlot of the query, colored by predicted cell type
pp1 <- DimPlot(object = query, reduction = "proj.umap", group.by = predicted.id[1], label = TRUE, repel = T) + NoLegend()
pp2 <- DimPlot(object = query, reduction = "proj.umap", group.by = predicted.id[2], label = TRUE, repel = T,label.size = 3) + NoLegend()
#pp3 <- DimPlot(object = query, reduction = "proj.umap", group.by = predicted.id[3], label = TRUE, repel = T,label.size = 3) + NoLegend()
pp1 + pp2

ggsave("Azimuth_dimplot_query.pdf", width =16, height = 5)
p_group <- DimPlot(object = query, reduction = "proj.umap", group.by = "group_id", label = F, repel = T)
p_id <- DimPlot(object = query, reduction = "proj.umap", group.by = "orig.ident", label = F, repel = T)
p_group + p_id

ggsave("Azimuth_dimplot_diagnosis_sample.pdf", width = 15, height = 4)
cluster_p <- DimPlot(object = query, reduction = "proj.umap", group.by = "seurat_clusters", label = TRUE, repel = T) + NoLegend()
cluster_p + pp2
## Warning: ggrepel: 19 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

ggsave("clusters_cell_types2_annotation_dimplot.pdf", width =14, height = 7)
## Warning: ggrepel: 21 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
cluster_p + pp1
## Warning: ggrepel: 19 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

ggsave("clusters_cell_types1_annotation_dimplot.pdf", width =14, height = 7)
## Warning: ggrepel: 21 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
#cluster_p + pp3
#ggsave("clusters_cell_types3_annotation_dimplot.pdf", width =14, height = 7)
FeaturePlot(object = query, features = paste0(predicted.id, ".score"), reduction = "proj.umap",ncol = 2)

ggsave("featureplot_predicted_score.pdf", width=16, height=4)
VlnPlot(object = query,
features = paste0(predicted.id, ".score"),
group.by = predicted.id,
pt.size = 0) +
NoLegend()

ggsave("vlnplot_predicted_score.pdf", width=16, height=6)
# Plot the mapping score
FeaturePlot(object = query, features = "mapping.score", reduction = "proj.umap")

VlnPlot(object = query, features = "mapping.score", group.by = predicted.id) + NoLegend()

#saveRDS(query, file = "query_mapped.RDS")
visualization using RNA assay
DefaultAssay(query) <- "RNA"
pbmc <- FindVariableFeatures(query, selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(pbmc)
pbmc <- ScaleData(pbmc, features = all.genes)
## Centering and scaling data matrix
pbmc <- RunPCA(pbmc, features = VariableFeatures(object = pbmc), npcs = 50)
## PC_ 1
## Positive: SELM, C1S, PLAC9, C1R, PCOLCE, COL1A2, NNMT, SERPING1, COL6A2, NGFRAP1
## CCDC80, ISLR, LIMA1, PTRF, PPIC, NBL1, APP, FSTL1, SDC2, EFEMP2
## CALD1, MXRA8, CFI, LGALS3BP, FAM114A1, PRKCDBP, FKBP10, CAV1, COL6A1, SCARA5
## Negative: HLA-DRA, CD74, HLA-DRB1, C1QA, HLA-DPB1, HLA-DPA1, HLA-DMB, HLA-DQB1, ITGB2, CD83
## C1QC, C1QB, VSIG4, CD14, HCST, HLA-DQA1, LYZ, MNDA, ALOX5AP, PLAUR
## FCGR3A, RNASE1, MS4A4A, OLR1, GPR183, CPVL, IFI30, FOLR2, HLA-DRB5, CSTA
## PC_ 2
## Positive: ECSCR.1, TM4SF1, PLVAP, ACKR1, GNG11, ADGRL4, NPDC1, PECAM1, RAMP2, EMCN
## PCAT19, VWF, CLEC14A, BCAM, ESAM, JAM2, PDLIM1, CYYR1, MMRN2, NRN1
## ETS2, ERG, GIMAP7, CAV2, CXorf36, NOSTRIN, AQP1, MALL, PTPRB, MYCT1
## Negative: HLA-DRA, FN1, CTSB, HLA-DRB1, VSIG4, HLA-DMB, HLA-DPB1, GPNMB, HLA-DPA1, CTSD
## C1QA, CAPG, CD74, HLA-DQA1, FTL, ITGB2, HLA-DRB5, C1QC, C1QB, HLA-DQB1
## CD14, PLTP, TREM1, FCGR3A, MARCO, TGFBI, MS4A4A, PCOLCE, LYZ, CD83
## PC_ 3
## Positive: CD74, HLA-DRA, HLA-DRB1, HLA-DPA1, HLA-DPB1, HLA-DMB, HLA-DQA1, HLA-DQB1, HLA-DRB5, ITGB2
## CAPG, VSIG4, PLAUR, CPVL, CTSB, TUBA1B, CD83, CD14, MNDA, LYZ
## FCGR2B, IFI30, FCGR3A, C1QC, C1QA, C1QB, MARCO, CSTB, STX11, FTL
## Negative: DCN, FBLN1, AEBP1, SRPX, COL14A1, PODN, BICC1, PDGFRL, DPT, ECM2
## COL3A1, OLFML3, COL1A1, WISP2, MFAP4, CXCL12, THY1, FGF7, THBS2, EFEMP1
## CP, MXRA5, COL1A2, COL6A2, SFRP1, COL6A1, MMP2, ABCA8, NOVA1, ANGPTL5
## PC_ 4
## Positive: ITGBL1, FN1, PRG4, ERRFI1, ITGB8, CLIC5, GABRA4, CRTAC1, SEMA5A, DIRC3
## MT2A, CAV1, TMEM196, AK1, SPARCL1, SOX5, NTN4, VKORC1, PPP1R1A, HTRA1
## DNASE1L3, HBEGF, ZNF385B, FGF10, ANK3, BTC, MTUS2, SEMA3A, VEGFC, SLC2A12
## Negative: DCN, SERPINF1, FBLN2, PDGFRL, GSN, FBLN1, VCAN, PODN, FBLN5, CFD
## MFAP4, MFAP5, OLFML3, AEBP1, THY1, MGST1, DPT, SRPX, TUBA1A, C3
## ECM2, FAM180B, WISP2, ASPN, MMP2, SGCE, FHL1, CNN3, FBN1, SMOC2
## PC_ 5
## Positive: RNASE1, C1QA, C1QB, ALDH1A1, FTL, MARCO, FOLR2, C1QC, GPNMB, LGMN
## NUPR1, LILRB5, FCGR3A, TIMD4, MS4A4A, CTSD, TREM2, IFI27, CTSB, SEPP1
## PLTP, HPGDS, MSR1, FUCA1, CD163, GCHFR, RBP4, GPR34, VSIG4, APOC1
## Negative: CREM, CORO1A, IL2RG, CXCR4, CD48, JAML, CLEC10A, CD52, ICAM3, RAC2
## SAMSN1, RILPL2, CYTIP, GPR183, ISG20, GPAT3, CD3D, IL32, FCN1, FCER1A
## LGALS2, CST7, TRAC, CFP, CD1C, CD2, TUBA4A, STX11, HCST, DUSP4
pbmc <- RunUMAP(pbmc, dims = 1:50)
## 15:18:28 UMAP embedding parameters a = 0.9922 b = 1.112
## 15:18:28 Read 59845 rows and found 50 numeric columns
## 15:18:28 Using Annoy for neighbor search, n_neighbors = 30
## 15:18:28 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 15:18:37 Writing NN index file to temp file /tmp/Rtmpq8FfHN/file36e1aa7b50e7d0
## 15:18:37 Searching Annoy index using 1 thread, search_k = 3000
## 15:19:01 Annoy recall = 100%
## 15:19:02 Commencing smooth kNN distance calibration using 1 thread
## 15:19:05 Initializing from normalized Laplacian + noise
## 15:19:09 Commencing optimization for 200 epochs, with 2650316 positive edges
## 15:19:34 Optimization finished
## Warning: Cannot add objects with duplicate keys (offending key: UMAP_), setting
## key to 'umap_'
query <- pbmc
# DimPlot of the query, colored by predicted cell type
pp1 <- DimPlot(object = query, reduction = "umap", group.by = predicted.id[1], label = TRUE, repel = T) + NoLegend()
pp2 <- DimPlot(object = query, reduction = "umap", group.by = predicted.id[2], label = TRUE, repel = T,label.size = 3) + NoLegend()
#pp3 <- DimPlot(object = query, reduction = "proj.umap", group.by = predicted.id[3], label = TRUE, repel = T,label.size = 3) + NoLegend()
pp1 + pp2

ggsave("Azimuth_dimplot_query_umap.pdf", width =16, height = 5)
## Warning: ggrepel: 3 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
p_group <- DimPlot(object = query, reduction = "umap", group.by = "group_id", label = F, repel = T)
p_id <- DimPlot(object = query, reduction = "umap", group.by = "orig.ident", label = F, repel = T)
p_group + p_id

ggsave("Azimuth_dimplot_diagnosis_sample_umap.pdf", width = 15, height = 4)
cluster_p <- DimPlot(object = query, reduction = "umap", group.by = "seurat_clusters", label = TRUE, repel = T)+NoLegend()
cluster_p + pp2

ggsave("clusters_cell_types2_annotation_dimplot_umap.pdf", width =14, height = 7)
cluster_p + pp1

ggsave("clusters_cell_types1_annotation_dimplot_umap.pdf", width =14, height = 7)